Tracking the Growth of COVID-19 Cases in Japan

データソース

以下のダッシュボードのgithubで公開されているcsvを使用してます。

In [1]:
# Now
! date
2020年  4月 12日 日曜日 10:50:06 JST
In [2]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from termcolor import colored, cprint

#thread
import threading
import concurrent.futures

import plotly.graph_objects as go
#import plotly.offline as offline
#offline.init_notebook_mode(connected=True)

from matplotlib.dates import DateFormatter
from matplotlib import rcParams
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib notebook  # If you switch from inline to notebook, you must restart the kernel!

#matplotlibで日本語フォントを使う設定
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Hiragino Maru Gothic Pro', 'Yu Gothic', 'Meirio', 'Takao', 'IPAexGothic', 'IPAPGothic', 'VL PGothic', 'Noto Sans CJK JP']

データ・セット

In [3]:
# csv読み込み
df_raw = pd.read_csv( 'https://raw.githubusercontent.com/swsoyee/2019-ncov-japan/master/Data/byDate.csv')

# 全部 Nanの行は削除
if( df_raw.iloc[-1:,1:].isnull().all(axis=1).values[0] == True ):
    df_raw = df_raw.drop(df_raw.index.values[-1])
    print('drop [.isnull().all()] record')

# 日付に変換
df_raw['date'] = pd.to_datetime(df_raw['date'], format='%Y%m%d')
days = df_raw['date'].dt.strftime('%m/%d/%y').values.tolist()

# 欠損値を0にする
df_raw = df_raw.fillna(0)

# 増加数と日本全体(クルーズ船とチャーター便を除く)
inc = df_raw
inc['日本全体'] = inc.drop(columns=['クルーズ船','チャーター便']).iloc[:,1:].sum(axis=1)

# 累積数を出す
tot = pd.concat( [inc.iloc[:,0:1], inc.iloc[:,1:].cumsum()], axis=1)

# 流用元(global版)と同じ構成になるよう
tmp = tot.set_index('date').T.reset_index()
tmp.columns.name = None
tmp = tmp.rename(columns={'index':'Country/Region'})
tmp['Province/State'] = np.nan
tmp['Lat'] = np.nan
tmp['Long'] = np.nan
df = pd.concat([tmp[['Province/State', 'Country/Region', 'Lat', 'Long']], tmp.iloc[:, 1:-3]], axis=1)
df.columns = df.columns[0:4].values.tolist() + days
In [4]:
 df.head()
Out[4]:
Province/State Country/Region Lat Long 01/15/20 01/16/20 01/17/20 01/18/20 01/19/20 01/20/20 ... 04/03/20 04/04/20 04/05/20 04/06/20 04/07/20 04/08/20 04/09/20 04/10/20 04/11/20 04/12/20
0 NaN 北海道 NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 ... 188.0 191.0 191.0 191.0 195.0 205.0 223.0 236.0 252.0 252.0
1 NaN 青森 NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 ... 11.0 11.0 11.0 11.0 12.0 12.0 14.0 17.0 22.0 22.0
2 NaN 岩手 NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 NaN 宮城 NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 ... 18.0 20.0 23.0 26.0 32.0 34.0 36.0 38.0 45.0 45.0
4 NaN 秋田 NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 ... 10.0 11.0 11.0 11.0 11.0 11.0 11.0 13.0 15.0 15.0

5 rows × 93 columns

In [ ]:
 
In [5]:
cases = df.iloc[:,[1,-1]].groupby('Country/Region').sum()
mostrecentdate = cases.columns[0]
print('\nTotal number of cases (in countries with at least 100 cases) as of', mostrecentdate)

cases = cases.sort_values(by = mostrecentdate, ascending = False)
cases = cases[cases[mostrecentdate] >= 100]
#cases = cases[cases[mostrecentdate] >= 20]
cases.head()
Total number of cases (in countries with at least 100 cases) as of 04/12/20
Out[5]:
04/12/20
Country/Region
日本全体 6907.0
東京 1902.0
大阪 766.0
クルーズ船 712.0
神奈川 513.0

Curve Fitting Global COVID-19 Cases

In [6]:
class CurveF(object):
    
    @staticmethod
    def liner(t, a, b):
        return(a * t + b)
    
    @staticmethod
    def logistic(t, a, b, c, d):
        return c + (d - c)/(1 + a * np.exp(- b * t))

    @staticmethod
    def exponential(t, a, b, c):
        return a * np.exp(b * t) + c

    
    def __init__(self, x=None, y=None, maxfev=1000000):
        self.x = None
        self.y = None
        self.maxfev = maxfev
        
        self.set_xy(x,y)
        
        self.fitinfo = {
            'liner'      :{ 'func':  CurveF.liner,        'popt': None, 'pcov': None, 'para': dict()},
            'logistic'   :{ 'func':  CurveF.logistic,     'popt': None, 'pcov': None, 'para': dict()},
            'exponential':{ 'func':  CurveF.exponential,  'popt': None, 'pcov': None, 'para': dict(bounds=([0,0,-100],[100,0.9,100]))}
        }

    def set_xy(self, x, y):
        self.x = x
        self.y = y
        if( self.x is not None):
            if( type(self.x) != np.ndarray ):
                self.x = np.array(self.x, dtype=np.float)
        if( self.y is not None):
            if( type(self.y) != np.ndarray ):
                self.x = np.array(self.y, dtype=np.float)
    
    def fit(self, verbose=None, idstr=None):
        for i, (key, val) in enumerate(self.fitinfo.items()):
            if(verbose):
                prtstr = ''
                if( idstr is not None ):
                    prtstr += f'[{idstr}]'
                if(verbose is not None):
                    if(verbose == 1 and i==0):
                        print(f'{prtstr} fitting... ')
                    if(verbose >= 2):
                        print(f'{prtstr} fitting... {key}')
                
            popt = None
            pcov = None
            try:
                popt, pcov = curve_fit( val['func'], self.x,  self.y,  maxfev=self.maxfev, **val['para'])
            except:
                print(f"{prtstr} exception!!")
            val['popt'] = popt
            val['pcov'] = pcov
        if(verbose is not None):
            print(f'{prtstr} finish.')
    
    def calc(self, fitname, x ):
        fdic = self.fitinfo[fitname]
        if(fdic['popt'] is None):
            ret = []
        else:
            ret = fdic['func'](x, *fdic['popt'])
        return(ret)
In [7]:
#tokyo = df[ df['Country/Region'] == '日本全体']
tokyo = df[ df['Country/Region'] == '東京']
y = tokyo.iloc[0,4:].values
x = np.arange( len(y))

時系列でアニメーションにする

データの準備

In [8]:
# データセット準備
obs_df = df[ df['Country/Region'] == '日本全体']
obsY = obs_df.iloc[0,4:].values
obsInc = np.diff(np.insert(obsY, 0, 0))

tsize = len(obs_df.iloc[0,4:].values)
tstart = 20 

days_num  =  tsize - tstart + 1
days_arange = np.arange(days_num)

# データセットを時系列に整理
datafits = []
for offset in days_arange:
    at = tstart + offset
    at_obsY = obsY[0:at]
    atdic = {}
    atdic['at'] = at
    atdic['day_offset'] = offset
    atdic['data_Y'] = at_obsY
    atdic['curve'] = CurveF( np.arange(len(at_obsY)), y=at_obsY)
    atdic['data_Inc'] = obsInc[0:at]
    datafits.append(atdic)

# マルチスレッドでフィット
maxthreadd = 10
if( maxthreadd == 0):
    for i, one in enumerate(datafits):
        one['curve'].fit(verbose=2,idstr=f'{i:03d}')
else:
    print(f"thread pool start !!  -  num={maxthreadd}")
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=maxthreadd)
    count = 0
    for i, one in enumerate(datafits):
        executor.submit(one['curve'].fit, verbose=0, idstr=f'{i:03d}')
        count += 1
    print(f"thread pool {count} submmit")
    executor.shutdown()
    print("main thread finish!!")
thread pool start !!  -  num=10
thread pool 70 submmit
main thread finish!!

plotlyでアニメーション

In [9]:
# 上記データを元にplotlyのdataの形式にする
data_series = []
for i, one in enumerate(datafits):
    at = one['at']
    at14 = at + 14
    ad =  [
        # observed total
        {
            "x":  np.arange(at),
            "y":  one['data_Y'],
            "type": "scatter",
            "mode": "markers",
            "name": "Total"
        }, 
        # observed increase 
        {
            "x":  np.arange(at),
            "y":  one['data_Inc'],
            "type": "bar",
            "name": "Increase"
        }, 
        # fit liner
        {
            "x":  np.arange(at14),
            "y":  one['curve'].calc('liner', np.arange(at14)),
            "mode": "lines",
            "line": {"dash": "dash"},
            "name": "liner"
        },
        # fit logsitic
        {
            "x":  np.arange(at14),
            "y":  one['curve'].calc('logistic',np.arange(at14)),
            "mode": "lines",
            "line": {"dash": "dash"},
            "name": "logistic"
        },
        # fit exponential
        {
            "x":  np.arange(at14),
            "y":  one['curve'].calc('exponential',np.arange(at14)),
            "mode": "lines",
            "line": {"dash": "dot"},
            "name": "exponential"
        }
    ]
    data_series.append(ad)
In [10]:
# make figure
fig_dict = {
    "data": [],
    "layout": {},
    "frames": []
}

fig_dict["layout"]["updatemenus"] = [
    {
        "buttons": [
            {
                "args": [None, {"frame": {"duration": 200, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 100,
                                "easing": "quadratic-in-out"}}],
                "label": "Play",
                "method": "animate"
            },
            {
                "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                  "mode": "immediate",
                                  "transition": {"duration": 0}}],
                "label": "Pause",
                "method": "animate"
            }
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }
]

sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Day ",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}

# make plotly data
fig_dict["data"] = data_series[-1]

# make plotly frames
for offset in days_arange:
    frame = {"data": [], "name": str(offset)}
    frame["data"] = data_series[offset]
    fig_dict["frames"].append(frame)

    slider_step = {
        "args": [
            [offset]
            ,
            {
                "frame": {"duration": 300, "redraw": False},
                 "mode": "immediate",
                 "transition": {"duration": 300}
            }
        ],
        "label": str(offset),
        "method": "animate"
    }
    sliders_dict["steps"].append(slider_step)


fig_dict["layout"]["sliders"] = [sliders_dict]
fig = go.Figure(fig_dict)

kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='罹患者数推移',  xaxis_title='確認日', yaxis_title='人数', **kwparam)
fig.show()
In [11]:
kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='罹患者数推移(Y軸対数)',  xaxis_title='確認日', yaxis_title='人数', **kwparam)
fig.update_layout(yaxis = { "type": "log"} )
fig.show()
In [ ]:
 

罹患者数推移 (X軸=総数、Y軸=増加数)

In [12]:
# make data
obs_df = df[ df['Country/Region'] == '日本全体']
obsY = obs_df.iloc[0,4:].values
obsInc = np.diff(np.insert(obsY, 0, 0))

tsize = len(obs_df.iloc[0,4:].values)
tstart = 20 

days_num  =  tsize - tstart + 1
days_arange = np.arange(days_num)



# make figure
fig_dict = {
    "data": [],
    "layout": {},
    "frames": []
}



fig_dict["layout"]["updatemenus"] = [
    {
        "buttons": [
            {
                "args": [None, {"frame": {"duration": 200, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 100,
                                                                    "easing": "quadratic-in-out"}}],
                "label": "Play",
                "method": "animate"
            },
            {
                 "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                   "mode": "immediate",
                                   "transition": {"duration": 0}}],
                "label": "Pause",
                "method": "animate"
            }
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }
]

sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Day ",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}


data_series = []
for offset in days_arange:
    at = tstart + offset    
    ad =  [
        # observed total
        {
            "x":  obsY[0:at],
            "y":  obsInc[0:at],
            "type": "scatter",
            "mode": "lines",
            "name": "Total"
        },
    ]
    data_series.append(ad)


# make data
fig_dict["data"] += data_series[-1]


# make frames
for offset in range(tsize-tstart):
    frame = {"data": [], "name": str(offset)}
    frame["data"] += data_series[offset]
    fig_dict["frames"].append(frame)
    
    
    slider_step = {"args": [
        [offset],
        {"frame": {"duration": 300, "redraw": False},
         "mode": "immediate",
         "transition": {"duration": 300}}
    ],
        "label": str(offset),
        "method": "animate"}
    sliders_dict["steps"].append(slider_step)


fig_dict["layout"]["sliders"] = [sliders_dict]

fig = go.Figure(fig_dict)

kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='# 罹患者数推移 (X軸=総数、Y軸=増加数)',  xaxis_title='総数', yaxis_title='増加数', **kwparam)
#fig.update_layout(xaxis = { "type": "log"}, yaxis = { "type": "log"}  )
#fig.update_layout(xaxis = { "type": "log"}  )
fig.show()
In [13]:
fig.update_layout(xaxis = { "type": "log"}, yaxis = { "type": "log"}  )
fig.update_layout(title='# 罹患者数推移 両軸対数 (X軸=総数、Y軸=増加数)' )
fig.show()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: